
clear
/*#delimit;*/
set mem 1000m
pause on
set more off

cd "/PATHFILE/"

tempfile all nearwell nearapi nearlease docket operator firm aaaa aaaaval 
tempfile wellpadid prod pipeline opdrill lease lease_firm lease_api

********************************************************************************

use `"`pathout'NearbyWellProd2003.dta"'
sort wellpadID
save `nearwell',replace

use `"`pathout'NearbyWellProdApi2003.dta"'
sort api
save `nearapi',replace

use `"`pathout'NearbyWellProd.dta"'
sort wellpadID
merge wellpadID using `nearwell'
tab _merge
drop _merge

sort wellpadID
save `nearwell',replace

use `"`pathout'NearbyWellProdApi.dta"'
sort api
merge api using `nearapi'
tab _merge
drop _merge

drop wellpad wellpadID
duplicates drop

sort api
save `nearapi',replace

use `"`pathout'NearbyLeaseAttributes.dta"'

sort wellpadID firmNUM
save `nearlease',replace

********************************************************************************
* Firm docket variables

use `"`pathout'DocketAggData.dta"'

*drop if fileyear > 2003
drop ObsGroup *_lease*
duplicates drop

sort firm_1 fileyear filemonth county
by firm_1 fileyear filemonth: gen counter = _n

foreach var of varlist *op_sum {
bysort firm_1: egen temp = mean(`var') if counter == 1
sort firm_1 temp
by firm_1: carryforward temp,replace

replace `var' = temp
drop temp
}
drop counter

gen other = 0
replace other = 1 if county != "tarrant"

foreach var of varlist *opCounty_sum {
bysort firm_1: egen temp = mean(`var') if other == 0
sort firm_1 temp
by firm_1: carryforward temp,replace

replace `var' = temp
drop temp

bysort firm_1: egen `var'_oth = mean(`var') if other == 1
sort firm_1 `var'_oth
by firm_1: carryforward `var'_oth,replace
}

gsort firm_1 county -fileyear -filemonth
duplicates drop firm_1 county, force
drop *_sum

foreach var of varlist *_opCounty_csum {
	bysort firm_1: egen `var'_oth = sum(`var') if other == 1
	gsort firm_1 -other
	by firm_1: carryforward `var'_oth, replace
}
drop if other == 1
drop other
replace firm_1 = "fortworth" if firm_1 == "fort worth"
replace firm_1 = "titan operating" if firm_1 == "titan"
replace firm_1 = "williamsgulfcoast" if firm_1 == "williams gulf coast"

drop *_oth

sort firm_1
save `docket',replace

********************************************************************************
* Merge firm names

use `"`pathout'FirmIDs.dta"'

sort firm_1
save `operator',replace

replace firm_1 = "smallfirm" if firmNUM == 0
duplicates drop

sort firmNUM
save `firm',replace

********************************************************************************
* Appraisal values

use `"`pathout'AppraisalValues_2003.dta"'
sort account_num
save `aaaaval',replace

* Count of residential parcels in subdivision and abstract

use `"`pathtad'AAAA_spaceCount.dta"'
sort account_num
merge account_num using `aaaaval'
tab _merge
drop _merge

sort account_num
save `aaaaval',replace

* Tract-level char for given household

use `"`pathtad'AAAA_tractChar.dta"'
sort account_num
merge account_num using `aaaaval'
tab _merge
drop _merge

sort account_num
save `aaaaval',replace

********************************************************************************
* Merge wellpad/ api IDs for each property

use `"`pathtad'AAAA_wellpadID_01232017.dta"'

gen near_dist_wellpad_inv = 1/near_dist_wellpad

duplicates tag account_num wellpadID, gen(t)
drop if t > 0
drop t

sort account_num
save `wellpadid',replace

********************************************************************************

use `"`pathtad'AAAAall.dta"'

drop n1 n2 n3 fullmerge unique overlap_flag deed_date notice_date instrument appraisal_date appraisal_val from_accts land_val improve_val total_val year prop_class tad_map mapsco exemption merge2013 merge_parcel spec2 spec1 special_dist

keep account_num land_acres land_sqft
duplicates drop account_num, force
sort account_num
merge account_num using `wellpadid'
tab _merge
drop if _merge != 3
drop _merge

duplicates drop account_num, force

bysort wellpadID: egen land_sqft_wp = sum(land_sqft)  
bysort wellpadID: egen land_acre_wp = sum(land_acres) 
bysort api: egen land_sqft_api = sum(land_sqft)  
bysort api: egen land_acre_api = sum(land_acres) 

sort account_num
save `aaaa',replace

********************************************************************************
* Merge lease attributes

use `"`pathout'LeaseDataForRegressions.dta"'

********************************************************************************

bysort api: egen minyear_api = min(instyear)
bysort api: egen maxyear_api = max(instyear)
bysort wellpadID: egen minyear_wp = min(instyear)
bysort wellpadID: egen maxyear_wp = max(instyear)

gen tag = 0
replace tag = 1 if minyear_api >= 2006 & maxyear_api <= 2008
replace tag = 1 if minyear_api >= 2003 & maxyear_api <= 2008

gen counter = 1
bysort api: egen sign = sum(counter)
bysort api: egen sign_2008 = sum(counter) if instyear < 2008
sort api sign_2008
by api: carryforward sign_2008, replace
bysort api: egen sign_2009 = sum(counter) if instyear < 2009
sort api sign_2009
by api: carryforward sign_2009, replace
bysort api: egen sign_2010 = sum(counter) if instyear < 2010
sort api sign_2010
by api: carryforward sign_2010, replace

gen sign_2008_frac = sign_2008/sign
gen sign_2009_frac = sign_2009/sign
gen sign_2010_frac = sign_2010/sign

drop counter

********************************************************************************
drop totalsigned firmsigned firmActiveByWellpad

gen counter = 1
bysort wellpadID: egen totalsigned = sum(counter)
bysort wellpadID firmNUM: egen firmsigned = sum(counter)

egen firmwpgroup = group(wellpadID firmNUM)
bysort firmwpgroup: gen firmwpcount = _n
sort wellpadID
by wellpadID: egen firmActiveByWellpad = sum(counter) if firmwpcount == 1
sort wellpadID firmActiveByWellpad
by wellpadID: carryforward firmActiveByWellpad, replace

drop counter firmwpgroup firmwpcount

********************************************************************************
drop *_fwp
foreach var of varlist termmonths royalty fulllease legallease surfacelease badslease externlease waterlease externlease_env fulllease_env{
	bysort firmNUM wellpadID: egen `var'_fwp = mean(`var') if `var' != .
	sort firmNUM wellpadID `var'
	by firmNUM wellpadID: carryforward `var',replace
	bysort firmNUM: egen `var'_f = mean(`var') if `var' != .
	sort firmNUM `var'
	by firmNUM: carryforward `var',replace
}
********************************************************************************

save `all',replace

keep api record instdate closeProd_date closeIssue_date timeBetween_* sign_* totalsigned firmActiveByWellpad
duplicates drop
sort api record instdate
save `lease_api',replace

clear
use `all'

keep firmNUM wellpadID record leaseAbstract instdate account_num *lease royalty termmonths
duplicates drop
sort account_num record leaseAbstract instdate
save `lease',replace

clear
use `all'

keep firmNUM wellpadID firmsigned *_f *_fwp
duplicates drop firmNUM wellpadID, force
sort firmNUM wellpadID
save `lease_firm',replace

********************************************************************************

* Merge production values

use `"`pathout'MATLAB_decision_stacked_match_full_prod.dta"'

sort leasegroup firmNUM
save `prod'

* Merge pipeline distance

use `"`pathtad'AAAA_pipeline.dta"'

sort account_num
save `pipeline',replace

* Merge operator drilling attributes from 2003

use `"`pathout'OperatorDrilling.dta"'

rename operator firm_1
sort firm_1
save `opdrill',replace

********************************************************************************

use `"`pathout'MATLAB_decision_stacked_match_full.dta"'

sort account_num firmNUM
egen firmcount = group(firmNUM)
sum firmcount

* Merge wellpad/ api IDs for each property

sort account_num
merge account_num using `wellpadid'
tab _merge
drop if _merge != 3
drop _merge

*drop if wellpad_dist > 4

* Merge pipeline distance

sort account_num
merge account_num using `pipeline'
tab _merge
drop if _merge == 2
drop _merge

* Merge firm names

sort firmNUM
merge firmNUM using `firm'
tab _merge
drop if _merge == 2
drop _merge

order firm_1 wellpadID

* Merge operator drilling attributes from 2003

sort firm_1
merge firm_1 using `opdrill'
tab _merge
drop if _merge == 2
drop _merge

* Merge production values

sort leasegroup firmNUM
merge leasegroup firmNUM using `prod'
tab _merge
drop if _merge == 2
drop _merge

* Merge lease characteristics

sort firmNUM wellpadID
merge firmNUM wellpadID using `lease_firm'
tab _merge
drop if _merge == 2
drop _merge

sort account_num record leaseAbstract instdate
merge account_num record leaseAbstract instdate using `lease'
tab _merge
drop if _merge == 2 | _merge == 1
drop _merge

sort api record instdate
merge api record instdate using `lease_api'
tab _merge
drop if _merge == 2
drop _merge

* Merge nearby drilling and leasing activity

sort wellpadID
merge wellpadID using `nearwell'
tab _merge
drop if _merge == 2
drop _merge

sort api
merge api using `nearapi'
tab _merge
drop if _merge == 2
drop _merge

sort wellpadID firmNUM
merge wellpadID firmNUM using `nearlease'
tab _merge
drop if _merge == 2
drop _merge

********************************************************************************

sort wellpadID totalsigned
by wellpadID: carryforward totalsigned, replace
by wellpadID: carryforward firmActiveByWellpad, replace

sort wellpadID firmNUM firmsigned
by wellpadID firmNUM: carryforward firmsigned, replace

foreach var of varlist termmonths_f - waterlease_f {
sort firmNUM `var'
by firmNUM: carryforward `var',replace
}
sort record api closeProd_date 
by record api: carryforward closeProd_date, replace
sort record api closeIssue_date 
by record api: carryforward closeIssue_date, replace
foreach var of varlist timeBetween_* {
sort record instdate `var'
by record instdate: carryforward `var',replace
} 
foreach var of varlist sign_* {
sort api `var'
by api: carryforward `var',replace
}
********************************************************************************

* Merge total land size

sort account_num
merge account_num using `aaaa'
tab _merge
drop if _merge != 3
drop _merge

sort account_num
merge account_num using `aaaaval'
tab _merge
drop if _merge == 2
drop _merge

sort firm_1
merge firm_1 using `docket'
tab _merge
drop if _merge == 2
drop _merge

gen operatorDum = 0
replace operatorDum = 1 if firm_1 == operator

drop record instdate leaseAbstract 
drop operator api_own operator_own api_oth operator_oth
drop api_min sublease county

order leasegroup firmNUM wellpadID
drop if api_cumgas == .
sort leasegroup firmNUM

describe

********************************************************************************

replace wellpadID_cumgas_sum = wellpadID_cumgas_sum/merge_count_wp
replace wellpadID_pxcumgas_sum = wellpadID_pxcumgas_sum/merge_count_wp
drop merge_count_wp

replace wellpadID_cumgas_t_sum = wellpadID_cumgas_t_sum/merge_count_wp_t
replace wellpadID_pxcumgas_t_sum = wellpadID_pxcumgas_t_sum/merge_count_wp_t
drop merge_count_wp_t

replace api_cumgas_sum = api_cumgas_sum/merge_count_api
replace api_pxcumgas_sum = api_pxcumgas_sum/merge_count_api
drop merge_count_api

replace api_cumgas_t_sum = api_cumgas_t_sum/merge_count_api_t
replace api_pxcumgas_t_sum = api_pxcumgas_t_sum/merge_count_api_t
drop merge_count_api_t

replace termmonths_fwp_sum = termmonths_fwp_sum/merge_count_term
drop merge_count_term

replace royalty_fwp_sum = royalty_fwp_sum/merge_count_royalty
drop merge_count_royalty

replace fulllease_fwp_sum = fulllease_fwp_sum/merge_count_clause
drop merge_count_clause


********************************************************************************

drop legallease_fwp_sum surfacelease_fwp_sum badslease_fwp_sum externlease_fwp_sum waterlease_fwp_sum externlease_env_fwp_sum fulllease_env_fwp_sum 
drop termmonths_wp_sum royalty_wp_sum legallease_wp_sum surfacelease_wp_sum badslease_wp_sum externlease_wp_sum fulllease_wp_sum waterlease_wp_sum externlease_env_wp_sum fulllease_env_wp_sum 
drop *_own *_oth
drop *oil*
drop market_time firmcount pidn taxpin
drop near_fid near_angle objectid calculated
drop firmsigned totalsigned fileyear filemonth
drop entity_* apiNUM
drop rule37* rule38* rule86* *_2003 *2010_2000 *2000_2010
drop unitization_* fieldrule_* pooling_*
drop api_pxcumoil_abst_med api_pxcumoil_wp_med api_pxcumgas_abst_med api_pxcumgas_wp_med
drop *firmCty_* *_csum_oth *_inv api_24mon*
drop samp*

********************************************************************************
* Counter-factual lease quality variables

foreach var of varlist termmonths royalty fulllease legallease surfacelease badslease externlease waterlease {
	
	bysort wellpadID firm_1: egen check = mean(`var') if `var' != . & decision == 1
	sort wellpadID firm_1 check
	by wellpadID firm_1: carryforward check, replace
	bysort firm_1: egen temp = mean(`var') if `var' != . & decision == 1
	sort firm_1 temp
	by firm_1: carryforward temp, replace
	replace check = temp if check == .
	
	replace `var'_fwp = check
	replace `var'_fwp = 0 if `var'_fwp == .

	drop check temp
}

gen fulllease_cf = fulllease + 1/17
gen fulllease_cf_env     = fulllease + 1/17 if environ_bun == 0 & decision == 1
gen fulllease_cf_noise   = fulllease + 1/17 if noise_bun == 0   & decision == 1
gen fulllease_cf_subsurf = fulllease - 1/17 if subsurfease_bun == 0 & decision == 1
gen fulllease_cf_surfdam = fulllease + 1/17 if surfdamage_bun == 0  & decision == 1

gen externlease_cf = externlease + 1/5
gen externlease_cf_env = externlease + 1/5 if environ_bun == 0 & decision == 1
gen externlease_cf_noise = externlease + 1/5 if noise_bun == 0 & decision == 1

foreach var of varlist fulllease_cf* externlease_cf* {

	bysort wellpadID firm_1: egen check = mean(`var') if `var' != . & decision == 1
	sort wellpadID firm_1 check
	by wellpadID firm_1: carryforward check, replace
	bysort firm_1: egen temp = mean(`var') if `var' != . & decision == 1
	sort firm_1 temp
	by firm_1: carryforward temp, replace
	replace check = temp if check == .
	
	gen `var'_fwp = check
	replace `var'_fwp = 0 if `var'_fwp == .

	drop check temp
}
order royalty termmonths *lease royalty_hat termmonths_hat *lease_hat1 royalty_fwp termmonths_fwp *lease_fwp *lease_cf* , last

drop insttype extbonus bonus nytbonus nytsample attributemerge_2017 daysprior daysaftercess *_bun *_bin
drop samebonus compressionstation disposalwell fence forcemaj injectionfluid leaseAttributeMerge nyFirstSample pugh verticalpugh
drop termmonths royalty fulllease legallease surfacelease badslease externlease waterlease

********************************************************************************
* Firm and Landowner profit

gen exp_royalty_est = 0.9*royalty_hat1*exp_royalty
gen exp_royalty_bonus_est = exp_royalty_est + bonus_hat1
replace bonus_hatorg = bonus_hatorg/land_acres
replace exp_royalty_bonus_est = . if exp_royalty_bonus_est == 0
replace exp_royalty_bonus_est = exp_royalty_est if exp_royalty_bonus_est == .

gen expFirm_royalty_est = 0.9*(1-royalty_hat1)*exp_royalty
gen expFirm_royalty_bonus_est = exp_royalty_est - bonus_hat1
replace expFirm_royalty_bonus_est = . if expFirm_royalty_bonus_est == 0
replace expFirm_royalty_bonus_est = expFirm_royalty_est if expFirm_royalty_bonus_est == .

bysort api: egen checkapi = max(exp_royalty_est)
drop if checkapi > 60000
drop checkapi

bysort api: egen checkapi = min(exp_royalty_est)
drop if checkapi == 0
drop checkapi

replace operatorDum = 0
replace operatorDum = 1 if op_prod_wells != 0
replace operatorDum = 0 if op_prod_wells == .
replace operatorDum = . if operatorDum == 0
sort firm_1 operatorDum
by firm_1: carryforward operatorDum, replace
replace operatorDum = 0 if operatorDum == .

replace landman = 0
replace landman = 1 if operatorDum == 0 & firm_1 != "smallfirm"

replace operatorLarge = 0
replace operatorLarge = 1 if firm_1 == "chesapeake" | firm_1 == "xto" | firm_1 == "dale" | firm_1 == "carrizo"

********************************************************************************
* Re-calculate total leases to wellpad and firm signed

drop totalsigned firmsigned api_totalsigned
bysort wellpadID: egen totalsignedWp = sum(decision)
bysort wellpadID firmNUM: egen firmsignedWp = sum(decision)
bysort api: egen totalsignedApi = sum(decision)
bysort api firmNUM: egen firmsignedApi = sum(decision)
bysort firmNUM: egen firmsigned = sum(decision)

sort decision wellpadID firmNUM
by decision wellpadID firmNUM: gen temp = _n if decision == 1
sort wellpadID temp
by wellpadID: egen firmcountWp = sum(temp) if temp == 1
sort wellpadID firmcountWp
by wellpadID: carryforward firmcountWp, replace
drop temp

sort decision api firmNUM
by decision api firmNUM: gen temp = _n if decision == 1
sort api temp
by api: egen firmcountApi = sum(temp) if temp == 1
sort api firmcountApi
by api: carryforward firmcountApi, replace
drop temp

gen share = firmsignedWp/totalsignedWp
gen shareAPI = firmcountApi/totalsignedApi

********************************************************************************
* Calculate sample averages and stadard deviations for observable variables for matching model

* Lease obs variables
bysort leasegroup: gen obsCount = _n

local leaseobs = "termmonths_hat1 - externlease_hat1 land_sqft near_dist_pipe well_lateral_dist expFirm_royalty_est expFirm_royalty_bonus_est exp_royalty_est exp_royalty_bonus_est"

foreach var of varlist `leaseobs' {
sum `var' if obsCount == 1, det
gen `var'_minmax = (`var' - r(min))/(r(max) - r(min))
sum `var'_minmax, det
}
drop obsCount

* Wellpad obs variables
bysort wellpadID firm_1: gen wellCount = _n

local leaseobs = "*_fwp well_lateral_dist"

foreach var of varlist `leaseobs' {
sum `var' if wellCount == 1, det
replace `var' = (`var' - r(min))/(r(max) - r(min))
sum `var', det
}
drop wellCount

* Wellpad obs variables
bysort wellpadID: gen wellCount = _n

local leaseobs = "totalsignedWp firmcountWp parcelToWellpad"

foreach var of varlist `leaseobs' {
sum `var' if wellCount == 1, det
gen `var'_minmax = (`var' - r(min))/(r(max) - r(min))
sum `var'_minmax, det
}
drop wellCount

* Api obs variables
bysort api: gen apiCount = _n

local leaseobs = "totalsignedApi firmcountApi parcelToApi"

foreach var of varlist `leaseobs' {
sum `var' if apiCount == 1, det
gen `var'_minmax = (`var' - r(min))/(r(max) - r(min))
sum `var'_minmax, det
}
drop apiCount

* Abstract obs variables
bysort abst_no: gen abstCount = _n

local leaseobs = "land_avg abst_no_app_med"

foreach var of varlist `leaseobs' {
sum `var' if abstCount == 1, det
replace `var' = (`var' - r(min))/(r(max) - r(min))
sum `var', det
}
drop abstCount

* Firm operator obs variables
bysort firm_1: gen firmCount = _n

foreach var of varlist op_prod_wells {
sum `var' if firmCount == 1, det
replace `var' = (`var' - r(min))/(r(max) - r(min))
sum `var', det
}
drop firmCount

* Firm time obs variables
bysort firm_1 instyear instmonth: gen firmCount = _n

foreach var of varlist enforce_firm_csum {
sum `var' if firmCount == 1, det
replace `var' = (`var' - r(min))/(r(max) - r(min))
sum `var', det
}
drop firmCount

********************************************************************************

gen well_dist_largeOp = operatorLarge*well_lateral_dist_minmax
gen land_sqft_largeOp = operatorLarge*land_sqft_minmax
replace share = firmsignedApi/totalsignedApi

label var parcelToWellpad_minmax "Parcels to Drill"
label var abst_no_app_med "Appraisal Val"
label var land_sqft_minmax "Land (sqft)"
label var near_dist_pipe_minmax "Pipe Dist. (km)"
label var well_lateral_dist_minmax "Well Dist. (km)"
label var parcelIntLat "Lateral Intersects"

label var op_prod_wells "Well Ct."
label var well_dist_largeOp "Well Dist.*Large Op."
label var land_sqft_largeOp "Land*Large Op."
label var enforce_firm_csum "TRC Enforcement"

label var exp_royalty_bonus_est_minmax "Royalty + Bonus (Landowner)"
label var expFirm_royalty_bonus_est_minmax "Royalty + Bonus (Firm)"
label var exp_royalty_est_minmax "Royalty (Landowner)"
label var expFirm_royalty_est_minmax "Royalty (Firm)"

********************************************************************************
* Table 9: Parcel & Firm Summary Statistics

local firm_char = "op_prod_wells well_dist_largeOp land_sqft_largeOp enforce_firm_csum"

sum landman operatorLarge if decision == 1
sum expFirm_royalty_est expFirm_royalty_bonus_est

* Parcel Characteristics
sum bonus_hat1 bonus_frac if decision == 1
sum exp_royalty_est exp_royalty_bonus_est
sum bonus_hat1 bonus_frac if decision == 1

********************************************************************************
* Table 10: Assortativity (top panel)

local parcel_char = "parcelToWellpad_minmax abst_no_app_med land_sqft_minmax  landman operatorLarge near_dist_pipe_minmax well_lateral_dist_minmax parcelIntLat"

reg exp_royalty_bonus_est_minmax fulllease_fwp `parcel_char' if decision == 1
reg fulllease_fwp expFirm_royalty_bonus_est_minmax `parcel_char' if decision == 1

reg exp_royalty_est_minmax fulllease_fwp `parcel_char' if decision == 1
reg fulllease_fwp expFirm_royalty_est_minmax `parcel_char' if decision == 1

********************************************************************************
* Table 10: Assortativity (bottom panel)

local firm_char = "op_prod_wells well_dist_largeOp land_sqft_largeOp enforce_firm_csum"

reg expFirm_royalty_bonus_est_minmax fulllease_fwp `firm_char' if decision == 1
reg fulllease_fwp exp_royalty_bonus_est_minmax `firm_char' if decision == 1
reg share fulllease_fwp exp_royalty_bonus_est_minmax `firm_char' if decision == 1

reg expFirm_royalty_est_minmax fulllease_fwp `firm_char' if decision == 1
reg fulllease_fwp exp_royalty_est_minmax `firm_char' if decision == 1
reg share fulllease_fwp exp_royalty_est_minmax `firm_char' if decision == 1

********************************************************************************

replace decision = 0 if decision == .

order leasegroup wellpadID account_num firmNUM grdum firmsigned firmsignedWp decision instyear instmonth firmcountWp firmcountApi
order fulllease_cf_*, last

sort wellpadID leasegroup grdum firmNUM
duplicates drop wellpadID leasegroup grdum, force

********************************************************************************

save `"`pathdta'MATLAB_decision_stacked_full.dta"', replace

* Use the below to create data files that can be used to run the matching model in Matlab

/*
tempfile all
save `all',replace

drop firm_1
outsheet using `"`pathdta'MATLAB_decision_stacked_full_noname_`name'.csv"', comma replace nonames

drop if decision == 0
outsheet using `"`pathdta'MATLAB_decision_stacked_full0_noname_`name'.csv"', comma replace nonames

clear
use `all'

drop if decision == 0
order firm_1, last

outsheet using `"`pathdta'MATLAB_decision_stacked_full0_`name'.csv"', comma replace

